This data set contains 7 FCS (flow cytometry standard) files. Each FCS file contains single cell data for one cell subset that is a well-established, phenotypically distinct population. This is mass cytometry data for healthy human PBMC (peripheral blood mononuclear cells). The populations were expert gated following a t-SNE analysis. The first section of the code will run two dimensionality reduction tools, UMAP and t-SNE, on the data set. Next, you will run FlowSOM on the both the UMAP and t-SNE axes to cluster, or group together, the various cell populations. Finally, you will run MEM to see enrichment scores for each of the FlowSOM clusters or populations that have been expert gated. The goal of this exercise is to run several computational tools on a single cell data set to get a feel for the workflow used in the Irish lab as well as compare the various analysis methods. The method for comparison of the cell populations by automated or manual analysis is RMSD.
# Time <10 sec
###### CONSTANTS TO SET ########
# cofactor for arcsinh transformation
COFACTOR = 15
# set seed for reproducible results
OVERALL_SEED = 43
# set FlowSOM target number of clusters
CLUSTER_NUM = 10
################################
# read files into R by setting working directory and directing R to the fcs files
setwd(paste(getwd(), "/datafiles/PBMC", sep = ""))
files <- dir(pattern = "*.fcs")
# convert and combine data for use in downstream analysis
data <- lapply(lapply(files, read.FCS), exprs)
combined.data = as.data.frame(do.call(rbind, mapply(
cbind, data, "cluster" = c(1:length(data)), SIMPLIFY = F)))
# choose channels with markers to use for downstream analysis and apply arcsinh transformation with a cofactor of 15
transformed.chosen.markers <- combined.data %>%
select(contains("-"),-contains("Ir")) %>%
mutate_all(function(x)
asinh(x / COFACTOR))
cat("\n\n...'data_preparation' finished running")
##
##
## ...'data_preparation' finished running
# Time ~5 min
set.seed(OVERALL_SEED)
# the line below will run t-SNE on the scaled surface markers (to see help page
# for t-SNE, type "?Rtsne -- enter" in console)
# you can view t-SNE progress by opening up the console below
mytSNE = Rtsne(
transformed.chosen.markers, # input scaled data
dims = 2, # number of final
# dimensions
initial_dims = length(transformed.chosen.markers), # number of initial
# dimensions
perplexity = 30, # perplexity (similar to # of nearest neighbors,
# will scale with data sets, cannot be greater than
# the number of events minus 1 divided by 3)
check_duplicates = FALSE,
max_iter = 1000, # number of iterations
verbose = TRUE
)
## Performing PCA
## Read the 49651 x 25 data matrix successfully!
## Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## - point 10000 of 49651
## - point 20000 of 49651
## - point 30000 of 49651
## - point 40000 of 49651
## Done in 40.88 seconds (sparsity = 0.002832)!
## Learning embedding...
## Iteration 50: error is 115.040738 (50 iterations in 12.58 seconds)
## Iteration 100: error is 115.040730 (50 iterations in 13.44 seconds)
## Iteration 150: error is 113.154074 (50 iterations in 15.88 seconds)
## Iteration 200: error is 102.145034 (50 iterations in 11.12 seconds)
## Iteration 250: error is 99.419584 (50 iterations in 10.23 seconds)
## Iteration 300: error is 4.858459 (50 iterations in 9.40 seconds)
## Iteration 350: error is 4.570469 (50 iterations in 9.31 seconds)
## Iteration 400: error is 4.391293 (50 iterations in 8.77 seconds)
## Iteration 450: error is 4.258306 (50 iterations in 8.48 seconds)
## Iteration 500: error is 4.153764 (50 iterations in 8.33 seconds)
## Iteration 550: error is 4.069274 (50 iterations in 8.46 seconds)
## Iteration 600: error is 3.998541 (50 iterations in 8.32 seconds)
## Iteration 650: error is 3.938060 (50 iterations in 8.73 seconds)
## Iteration 700: error is 3.885885 (50 iterations in 9.06 seconds)
## Iteration 750: error is 3.839568 (50 iterations in 8.81 seconds)
## Iteration 800: error is 3.799114 (50 iterations in 9.19 seconds)
## Iteration 850: error is 3.762185 (50 iterations in 9.16 seconds)
## Iteration 900: error is 3.728741 (50 iterations in 9.40 seconds)
## Iteration 950: error is 3.698379 (50 iterations in 9.36 seconds)
## Iteration 1000: error is 3.670783 (50 iterations in 8.92 seconds)
## Fitting performed in 196.94 seconds.
tsne.data = as.data.frame(mytSNE$Y)
cat("\n\n...'run_t-SNE' finished running")
##
##
## ...'run_t-SNE' finished running
# Time <10 sec
# setting aspect ratio for plots
range <- apply(apply(tsne.data, 2, range), 2, diff)
graphical.ratio.tsne <- (range[1] / range[2])
# t-SNE flat dot plot and density dot plot (1 dot = 1 cell)
tsne.plot <- data.frame(x = tsne.data[, 1], y = tsne.data[, 2])
# dot plot
ggplot(tsne.plot) + coord_fixed(ratio = graphical.ratio.tsne) +
geom_point(aes(x = x, y = y), cex = 0.3) + labs(x = "t-SNE 1", y = "t-SNE 2",
title = "PBMC Data on t-SNE Axes") +
theme_bw() +
labs(caption = "Data from Digggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# density dot plot
ggplot(tsne.plot, aes(x = x, y = y)) +
coord_fixed(ratio = graphical.ratio.tsne) + geom_bin2d(bins = 128) +
scale_fill_viridis_c(option = "A", trans = "sqrt") +
scale_x_continuous(expand = c(0.1, 0)) +
scale_y_continuous(expand = c(0.1, 0)) + labs(x = "t-SNE 1", y = "t-SNE 2",
title = "Density on t-SNE Axes") + theme_bw() +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cat("\n\n...'plot_t-SNE' finished running")
##
##
## ...'plot_t-SNE' finished running
# Time ~1 min
set.seed(OVERALL_SEED)
# Run UMAP on all scaled surface markers
# the line below will run UMAP on the data set (to see help page for UMAP, type
# "?UMAP -- enter" in console)
# you can view UMAP progress by opening up the console below
myumap <-
umap(transformed.chosen.markers, # input scaled data
n_neighbors = 15, # number of nearest neighbors to look at,
# scales with data set
n_threads = 1, # this makes UMAP reproducible
verbose = TRUE)
## 12:15:36 UMAP embedding parameters a = 1.896 b = 0.8006
## 12:15:36 Read 49651 rows and found 25 numeric columns
## 12:15:36 Using Annoy for neighbor search, n_neighbors = 15
## 12:15:36 Building Annoy index with metric = euclidean, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:15:40 Writing NN index file to temp file /var/folders/n_/4688pdns7rl8y7j8wcc9tvsc0000gn/T//RtmpUkVFH0/file6bf3200141b9
## 12:15:40 Searching Annoy index using 1 thread, search_k = 1500
## 12:15:49 Annoy recall = 100%
## 12:15:50 Commencing smooth kNN distance calibration using 1 thread
## 12:15:52 Initializing from normalized Laplacian + noise
## 12:16:01 Commencing optimization for 200 epochs, with 1112550 positive edges
## 12:16:26 Optimization finished
umap.data = as.data.frame(myumap)
cat("\n\n...'run_UMAP' finished running")
##
##
## ...'run_UMAP' finished running
# Time <10 sec
# setting aspect ratio for plots
range <- apply(apply(umap.data, 2, range), 2, diff)
graphical.ratio.umap <- (range[1] / range[2])
# UMAP flat dot plot and density dot plot (1 dot = 1 cell)
umap.plot <- data.frame(x = umap.data[, 1], y = umap.data[, 2])
# dot plot
ggplot(umap.plot) + coord_fixed(ratio = graphical.ratio.umap) +
geom_point(aes(x = x, y = y), cex = 0.3) + labs(x = "UMAP 1", y = "UMAP 2",
title = "PBMC Data on UMAP Axes") + theme_bw() +
labs(caption = "Data from Digggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
# density dot plot
ggplot(umap.plot, aes(x = x, y = y)) +
coord_fixed(ratio = graphical.ratio.umap) + geom_bin2d(bins = 128) +
scale_fill_viridis_c(option = "A", trans = "sqrt") +
scale_x_continuous(expand = c(0.1, 0)) +
scale_y_continuous(expand = c(0.1, 0)) + labs(x = "UMAP 1", y = "UMAP 2",
title = "Density on UMAP Axes") + theme_bw() +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cat("\n\n...'plot_UMAP' finished running")
##
##
## ...'plot_UMAP' finished running
# Time <10 sec
# create flowFrame for FlowSOM input (using t-SNE axes as input)
matrix <- as.matrix(tsne.data)
metadata <-
data.frame(name = dimnames(matrix)[[2]],
desc = dimnames(matrix)[[2]])
metadata$range <- apply(apply(matrix, 2, range), 2, diff)
metadata$minRange <- apply(matrix, 2, min)
metadata$maxRange <- apply(matrix, 2, max)
flowframe <- new("flowFrame",
exprs = matrix,
parameters = AnnotatedDataFrame(metadata))
# implement the FlowSOM on the data by running the line below (to see help page
# for FlowSOM, type "?FlowSOM --> enter" in console)
fsom <-
FlowSOM(
flowframe, # input flowframe
colsToUse = c(1:2), # columns to use
nClus = CLUSTER_NUM, # target number of clusters (this can be changed)
seed = OVERALL_SEED # set seed for reproducibility
)
FlowSOM.clusters.tsne <-
GetMetaclusters(fsom)
cat("\n\n...'run_FlowSOM_on_t-SNE' finished running")
##
##
## ...'run_FlowSOM_on_t-SNE' finished running
# Time <10 sec
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors,
rownames(qual_col_pals)))
# plot FlowSOM clusters on t-SNE axes
ggplot(tsne.plot) + coord_fixed(ratio=graphical.ratio.tsne) +
geom_point(aes(x=x, y=y, color=FlowSOM.clusters.tsne), cex = 0.3) +
labs(x = "t-SNE 1", y = "t-SNE 2",title = "FlowSOM Clustering on t-SNE Axes",
color = "Cluster") + theme_bw() +
guides(colour = guide_legend(override.aes = list(size=4))) +
scale_color_manual(values = sample(col_vector)) +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cat("\n\n...'plot_t-SNE_FlowSOM_clusters' finished running")
##
##
## ...'plot_t-SNE_FlowSOM_clusters' finished running
# Time ~ 1-2 min
# run FlowSOM on the t-SNE axes while varying cluster number
for (i in seq(5,45,by = 10)){
matrix <- as.matrix(tsne.data)
metadata <-
data.frame(name = dimnames(matrix)[[2]],
desc = dimnames(matrix)[[2]])
metadata$range <- apply(apply(matrix, 2, range), 2, diff)
metadata$minRange <- apply(matrix, 2, min)
metadata$maxRange <- apply(matrix, 2, max)
flowframe <- new("flowFrame",
exprs = matrix,
parameters = AnnotatedDataFrame(metadata))
fsom <-
FlowSOM(
flowframe,
colsToUse = c(1:2),
nClus = i,
seed = OVERALL_SEED
)
FlowSOM.clusters.vary <-
GetMetaclusters(fsom)
# plot FlowSOM clusters on t-SNE axes
legend.col = round(max(as.numeric(as.vector(FlowSOM.clusters.vary)))/3)
print(ggplot(tsne.plot) + coord_fixed(ratio=graphical.ratio.tsne) +
geom_point(aes(x=x, y=y, color=FlowSOM.clusters.vary), cex = 0.3) +
labs(x = "t-SNE 1", y = "t-SNE 2",title = "FlowSOM Clustering on t-SNE Axes",
color = "Cluster") + theme_bw() +
guides(colour = guide_legend(override.aes = list(size=4),
nrow = legend.col)) +
scale_color_manual(values = sample(col_vector)) +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()))}
cat("\n\n...'t-SNE_FlowSOM_varying_cluster_number' finished running")
##
##
## ...'t-SNE_FlowSOM_varying_cluster_number' finished running
# Time ~ 1 min
# create flowFrame for FlowSOM input (using orginal markers as input)
matrix <- as.matrix(transformed.chosen.markers)
metadata <-
data.frame(name = dimnames(matrix)[[2]],
desc = dimnames(matrix)[[2]])
metadata$range <- apply(apply(matrix, 2, range), 2, diff)
metadata$minRange <- apply(matrix, 2, min)
metadata$maxRange <- apply(matrix, 2, max)
flowframe <- new("flowFrame",
exprs = matrix,
parameters = AnnotatedDataFrame(metadata))
# implement the FlowSOM on the data by running the line below
fsom <-
FlowSOM(
flowframe,
colsToUse = c(1:ncol(transformed.chosen.markers)),
nClus = CLUSTER_NUM,
seed = OVERALL_SEED
)
FlowSOM.clusters.OG <-
GetMetaclusters(fsom)
# plot FlowSOM clusters on t-SNE axes
ggplot(tsne.plot) + coord_fixed(ratio=graphical.ratio.tsne) +
geom_point(aes(x=x, y=y, color=FlowSOM.clusters.OG), cex = 0.2) +
labs(x = "t-SNE 1", y = "t-SNE 2",
title = "FlowSOM Clustering on Original Markers", color = "Cluster") +
theme_bw() + scale_color_manual(values = sample(col_vector)) +
guides(colour = guide_legend(override.aes = list(size=4))) +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cat("\n\n...'FlowSOM_on_original_markers' finished running")
##
##
## ...'FlowSOM_on_original_markers' finished running
# Time ~ 1-2 min
# run FlowSOM on original markers while varying cluster number
for (i in seq(5,45,by = 10)){
matrix <- as.matrix(transformed.chosen.markers)
metadata <-
data.frame(name = dimnames(matrix)[[2]],
desc = dimnames(matrix)[[2]])
metadata$range <- apply(apply(matrix, 2, range), 2, diff)
metadata$minRange <- apply(matrix, 2, min)
metadata$maxRange <- apply(matrix, 2, max)
flowframe <- new("flowFrame",
exprs = matrix,
parameters = AnnotatedDataFrame(metadata))
fsom <-
FlowSOM(
flowframe,
colsToUse = c(1:ncol(transformed.chosen.markers)),
nClus = i,
seed = OVERALL_SEED
)
FlowSOM.clusters.vary <-
GetMetaclusters(fsom)
# plot FlowSOM clusters on t-SNE axes
legend.col = round(max(as.numeric(as.vector(FlowSOM.clusters.vary)))/3)
print(ggplot(tsne.plot) + coord_fixed(ratio=graphical.ratio.tsne) +
geom_point(aes(x=x, y=y, color=FlowSOM.clusters.vary), cex = 0.2) +
labs(x = "t-SNE 1", y = "t-SNE 2",title = "FlowSOM Clustering on Original Markers",
color = "Cluster") + theme_bw() +
guides(colour = guide_legend(override.aes = list(size=4),
nrow = legend.col)) +
scale_color_manual(values = sample(col_vector)) +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()))}
cat("\n\n...'original_markers_FlowSOM_varying_cluster_number' finished running")
##
##
## ...'original_markers_FlowSOM_varying_cluster_number' finished running
# Time <10 sec
# create flowFrame for FlowSOM input (using UMAP axes as input)
matrix <- as.matrix(umap.data)
metadata <-
data.frame(name = dimnames(matrix)[[2]],
desc = dimnames(matrix)[[2]])
metadata$range <- apply(apply(matrix, 2, range), 2, diff)
metadata$minRange <- apply(matrix, 2, min)
metadata$maxRange <- apply(matrix, 2, max)
flowframe <- new("flowFrame",
exprs = matrix,
parameters = AnnotatedDataFrame(metadata))
# implement the FlowSOM on the data by running the line below
fsom <-
FlowSOM(
flowframe,
colsToUse = c(1:2),
nClus = CLUSTER_NUM,
seed = OVERALL_SEED
)
FlowSOM.clusters.umap <-
GetMetaclusters(fsom)
# plot FlowSOM clusters on UMAP axes
ggplot(umap.plot) + coord_fixed(ratio=graphical.ratio.umap) +
geom_point(aes(x=x, y=y, color=FlowSOM.clusters.umap), cex = 0.3) +
labs(x = "UMAP 1", y = "UMAP 2",title = "FlowSOM Clustering on UMAP Axes",
color = "Cluster") + theme_bw() +
guides(colour = guide_legend(override.aes = list(size=4)))+
scale_color_manual(values = sample(col_vector))+
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cat("\n\n...FlowSOM_on_UMAP' finished running")
##
##
## ...FlowSOM_on_UMAP' finished running
# Time ~ 1-2 min
# run FlowSOM on the UMAP axes while varying cluster number
for (i in seq(5,45,by = 10)){
matrix <- as.matrix(umap.data)
metadata <-
data.frame(name = dimnames(matrix)[[2]],
desc = dimnames(matrix)[[2]])
metadata$range <- apply(apply(matrix, 2, range), 2, diff)
metadata$minRange <- apply(matrix, 2, min)
metadata$maxRange <- apply(matrix, 2, max)
flowframe <- new("flowFrame",
exprs = matrix,
parameters = AnnotatedDataFrame(metadata))
fsom <-
FlowSOM(
flowframe,
colsToUse = c(1:2),
nClus = i,
seed = OVERALL_SEED
)
FlowSOM.clusters.vary <-
GetMetaclusters(fsom)
# plot FlowSOM clusters on UMAP axes
legend.col = round(max(as.numeric(as.vector(FlowSOM.clusters.vary)))/3)
print(ggplot(umap.plot) + coord_fixed(ratio=graphical.ratio.tsne) +
geom_point(aes(x=x, y=y, color=FlowSOM.clusters.vary), cex = 0.3) +
labs(x = "UMAP 1", y = "UMAP 2",title = "FlowSOM Clustering on UMAP Axes",
color = "Cluster") + theme_bw() +
guides(colour = guide_legend(override.aes = list(size=4),
nrow = legend.col)) +
scale_color_manual(values = sample(col_vector)) +
labs(caption = "Data from Diggins et al., Nat Methods 2017, 14: 275-278 \nFlow Repository: FR-FCM-ZY63") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()))}
cat("\n\n...'UMAP_FlowSOM_varying_cluster_number' finished running")
##
##
## ...'UMAP_FlowSOM_varying_cluster_number' finished running
# Time ~30 sec
# Run MEM on the FlowSOM clusters found from using t-SNE axes
cluster = as.numeric(as.vector((FlowSOM.clusters.tsne)))
MEM.data = cbind(transformed.chosen.markers, cluster)
MEM.values.tf = MEM(
MEM.data, # input data (last column must contain cluster values)
transform = FALSE, # data is already scaled in this case
cofactor = 1,
choose.markers = FALSE,
markers = "all", # use all transformed, chosen markers from previous
# selection
choose.ref = FALSE, # reference will be all other cells
zero.ref = FALSE,
rename.markers = FALSE,
new.marker.names = "CD19,CD117,CD11b,CD4,CD8,CD20,CD34,CD61,CD123,CD45RA,CD45,CD10,CD33,CD11c,CD14,CD69,CD15,CD16,CD44,CD38,CD25,CD3,IgM,HLA-DR,CD56", # rename channels for labels
file.is.clust = FALSE,
add.fileID = FALSE,
IQR.thresh = NULL
)
# build MEM heatmap and output enrichment scores
build.heatmaps(
MEM.values.tf, # input MEM values
cluster.MEM = "both", # dendrogram for columns and rows
display.thresh = 2, # display threshold for MEM scores
newWindow.heatmaps = FALSE,
output.files = TRUE, # makes txt and PDF files for heatmap and MEM
# scores
labels = TRUE, # include labels in heatmap
only.MEMheatmap = FALSE
)
##
##
## 2:
## â–²CD4+9 CD3+4 CD45RA+3 CD44+2
## â–¼CD8-2
##
##
## 4:
## â–²CD4+8 CD3+3
## â–¼CD8-2 CD44-2
##
##
## 6:
## â–²CD4+9 CD3+3 CD44+2
## â–¼CD8-2 CD45RA-2
##
##
## 8:
## â–²CD4+9 CD3+3 CD44+2
## â–¼CD8-3 CD45RA-2
##
##
## 10:
## â–²CD123+6 HLA-DR+5 CD4+4 CD61+2 CD45RA+2
## â–¼CD3-8 CD8-2 CD45-2
##
##
## 5:
## â–²CD33+5 CD11c+5 CD61+4 CD14+4 CD11b+3 CD44+3 HLA-DR+3
## â–¼CD3-7 CD4-5 CD8-3
##
##
## 9:
## â–²CD11c+5 CD16+5 HLA-DR+3 CD123+2 CD33+2 CD44+2
## â–¼CD3-8 CD4-5 CD8-3
##
##
## 7:
## â–²CD16+6
## â–¼CD4-10 CD3-6 CD44-2
##
##
## 1:
## â–²HLA-DR+5 CD19+3 CD20+3 IgM+3
## â–¼CD4-10 CD3-7 CD8-4
##
##
## 3:
## â–²CD8+7 CD3+3 CD69+2
## â–¼CD4-10 CD38-2
cat("\n\n...run_MEM_on_FlowSOM_on_t-SNE' finished running")
##
##
## ...run_MEM_on_FlowSOM_on_t-SNE' finished running
# Time ~30 sec
cluster = as.numeric(as.vector((FlowSOM.clusters.OG)))
MEM.data = cbind(transformed.chosen.markers, cluster)
MEM.values.ogf = MEM(
MEM.data,
transform = FALSE,
cofactor = 1,
choose.markers = FALSE,
markers = "all",
choose.ref = FALSE,
zero.ref = FALSE,
rename.markers = FALSE,
new.marker.names = "CD19,CD117,CD11b,CD4,CD8,CD20,CD34,CD61,CD123,CD45RA,CD45,CD10,CD33,CD11c,CD14,CD69,CD15,CD16,CD44,CD38,CD25,CD3,IgM,HLA-DR,CD56", # rename channels for labels
file.is.clust = FALSE,
add.fileID = FALSE,
IQR.thresh = NULL
)
build.heatmaps(
MEM.values.ogf,
cluster.MEM = "both",
display.thresh = 2,
newWindow.heatmaps = FALSE,
output.files = TRUE,
labels = TRUE,
only.MEMheatmap = FALSE
)
##
##
## 6:
## â–²CD123+6 HLA-DR+5 CD4+4 CD61+2 CD45RA+2
## â–¼CD3-8 CD8-2 CD45-2
##
##
## 8:
## â–²CD33+5 CD11c+5 CD61+4 CD14+4 CD11b+3 CD44+3 HLA-DR+3
## â–¼CD3-7 CD4-5 CD8-3
##
##
## 7:
## â–²HLA-DR+6 CD11c+5 CD33+4 CD44+2
## â–¼CD3-8 CD4-4 CD8-3 CD45RA-2 CD45-2
##
##
## 9:
## â–²CD11c+5 CD16+5 CD123+3 HLA-DR+3 CD45RA+2 CD33+2 CD44+2
## â–¼CD3-8 CD4-5 CD8-3
##
##
## 1:
## â–²CD16+6
## â–¼CD4-10 CD3-6 CD44-2
##
##
## 10:
## â–²CD61+5 HLA-DR+5 CD19+3 CD20+2 IgM+2
## â–¼CD3-7 CD4-6 CD8-3
##
##
## 4:
## â–²HLA-DR+5 CD19+3 CD20+3 IgM+3
## â–¼CD4-10 CD3-7 CD8-4
##
##
## 5:
## â–²CD38+5 HLA-DR+3 CD19+2 CD45RA+2
## â–¼CD4-9 CD3-8 CD8-3
##
##
## 3:
## â–²CD8+7 CD3+3 CD69+2
## â–¼CD4-10 CD38-2
##
##
## 2:
## â–²CD4+4 CD3+4 CD44+2
## â–¼CD16-9 CD8-7 CD11c-5 HLA-DR-5 CD69-3 CD11b-2 CD20-2
cat("\n\n...run_MEM_on_FlowSOM_on_original' finished running")
##
##
## ...run_MEM_on_FlowSOM_on_original' finished running
# Time ~30 sec
cluster = as.numeric(as.vector((FlowSOM.clusters.umap)))
MEM.data = cbind(transformed.chosen.markers, cluster)
MEM.values.uf = MEM(
MEM.data,
transform = FALSE,
cofactor = 1,
choose.markers = FALSE,
markers = "all",
choose.ref = FALSE,
zero.ref = FALSE,
rename.markers = FALSE,
new.marker.names = "CD19,CD117,CD11b,CD4,CD8,CD20,CD34,CD61,CD123,CD45RA,CD45,CD10,CD33,CD11c,CD14,CD69,CD15,CD16,CD44,CD38,CD25,CD3,IgM,HLA-DR,CD56", # rename channels for labels
file.is.clust = FALSE,
add.fileID = FALSE,
IQR.thresh = NULL
)
build.heatmaps(
MEM.values.uf,
cluster.MEM = "both",
display.thresh = 2,
newWindow.heatmaps = FALSE,
output.files = TRUE,
labels = TRUE,
only.MEMheatmap = FALSE
)
##
##
## 7:
## â–²CD123+6 HLA-DR+5 CD4+4 CD61+2 CD45RA+2
## â–¼CD3-8 CD8-2 CD45-2
##
##
## 1:
## â–²CD33+5 CD11c+5 CD61+4 CD14+4 CD11b+3 CD44+3 HLA-DR+3
## â–¼CD3-7 CD4-5 CD8-3
##
##
## 2:
## â–²CD11c+5 CD16+5 HLA-DR+3 CD123+2 CD33+2 CD44+2
## â–¼CD3-8 CD4-5 CD8-3
##
##
## 3:
## â–²CD16+6
## â–¼CD4-10 CD3-6 CD44-2
##
##
## 5:
## â–²HLA-DR+5 CD19+3 CD20+3 IgM+3
## â–¼CD4-10 CD3-7 CD8-4
##
##
## 6:
## â–²CD38+5 CD45RA+3 HLA-DR+3 CD19+2
## â–¼CD4-9 CD3-8 CD8-3
##
##
## 4:
## â–²CD8+7 CD3+3 CD69+2
## â–¼CD4-10 CD38-2
##
##
## 9:
## â–²CD4+4 CD3+4 CD44+2
## â–¼CD16-9 CD8-7 CD11c-5 HLA-DR-5 CD69-3 CD11b-2 CD20-2
cat("\n\n...run_MEM_on_FlowSOM_on_UMAP' finished running")
##
##
## ...run_MEM_on_FlowSOM_on_UMAP' finished running
# Time ~30 sec
MEM.values.orig = MEM(
combined.data,
transform = TRUE,
cofactor = 15,
choose.markers = FALSE,
markers = "12:20,22:23,25:33,35:36,38:40",
choose.ref = FALSE,
zero.ref = FALSE,
rename.markers = FALSE,
new.marker.names = "CD19,CD117,CD11b,CD4,CD8,CD20,CD34,CD61,CD123,CD45RA,CD45,CD10,CD33,CD11c,CD14,CD69,CD15,CD16,CD44,CD38,CD25,CD3,IgM,HLA-DR,CD56",
file.is.clust = FALSE,
add.fileID = FALSE,
IQR.thresh = NULL
)
build.heatmaps(
MEM.values.orig,
cluster.MEM = "both",
display.thresh = 2,
newWindow.heatmaps = FALSE,
output.files = TRUE,
labels = TRUE,
only.MEMheatmap = FALSE
)
##
##
## 2:
## â–²CD8+7 CD3+3 CD69+2
## â–¼CD4-10 CD38-2
##
##
## 6:
## â–²CD33+5 CD11c+5 CD61+4 CD14+4 CD11b+3 CD44+3 HLA-DR+3
## â–¼CD3-7 CD4-5 CD8-3
##
##
## 3:
## â–²CD11c+4 HLA-DR+4 CD123+2 CD16+2
## â–¼CD3-8 CD4-3 CD8-3
##
##
## 7:
## â–²CD16+6
## â–¼CD4-10 CD3-6 CD44-2
##
##
## 5:
## â–²HLA-DR+5 IgM+4 CD19+3 CD20+3
## â–¼CD4-10 CD3-8 CD8-4
##
##
## 4:
## â–²HLA-DR+4 CD19+3 CD20+3
## â–¼CD4-9 CD3-8 CD8-3
##
##
## 1:
## â–²CD4+4 CD3+4 CD44+2
## â–¼CD16-9 CD8-7 CD11c-5 HLA-DR-5 CD69-3 CD11b-2 CD20-2
cat("\n\n...run_MEM_on_manually_gated_pops' finished running")
##
##
## ...run_MEM_on_manually_gated_pops' finished running
# RMSD to compare labels from all populations (FlowSOM clusters vs. manually
# gated populations)
orig.MEM.scores = as.data.frame(MEM.values.orig[[5]])
rownames(orig.MEM.scores) = paste0(rownames(orig.MEM.scores), " (Manual)")
ogf.MEM.scores = as.data.frame(MEM.values.ogf[[5]])
rownames(ogf.MEM.scores) = paste0(rownames(ogf.MEM.scores), " (OG/fSOM)")
uf.MEM.scores = as.data.frame(MEM.values.uf[[5]])
rownames(uf.MEM.scores) = paste0(rownames(uf.MEM.scores), ' (UMAP/fSOM)')
tf.MEM.scores = as.data.frame(MEM.values.tf[[5]])
rownames(tf.MEM.scores) = paste0(rownames(tf.MEM.scores), ' (t-SNE/fSOM)')
all.MEM.values = as.matrix(rbind(orig.MEM.scores, ogf.MEM.scores, uf.MEM.scores, tf.MEM.scores))
RMSD_vals <-
MEM_RMSD(
all.MEM.values, # input all MEM values from clustering and
# expert gating
format = NULL,
newWindow.heatmaps = FALSE,
output.matrix = TRUE
)
cat("\n\n...run_RMSD_on_clusters' finished running")
##
##
## ...run_RMSD_on_clusters' finished running